from unstructured.partition.ppt import partition_ppt
try:
    elements = partition_ppt(
        filename="D:/ideaSpace/rag-in-action-master/90-文档-Data/黑悟空/黑神话悟空.pptx"
    )
    print("✅ 使用unstructured解析ppt成功！首段内容：", elements[0].text[:100])
except Exception as e:
    print(f"❌ 使用unstructured解析ppt失败，尝试备用方案... Error: {e}")

    # 使用python-pptx备用方案
    from pptx import Presentation
    prs = Presentation("D:/ideaSpace/rag-in-action-master/90-文档-Data/黑悟空/黑神话悟空.pptx")
    texts = [shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text")]
    print("使用python-pptx备用方案提取内容：", texts)

print("使用Unstructured解析图片-------------\n")
from langchain_community.document_loaders import UnstructuredImageLoader
image_path = "D:/ideaSpace/rag-in-action-master/90-文档-Data/黑悟空/黑悟空英文.jpg"
loader = UnstructuredImageLoader(image_path)

data = loader.load()
print(data)